• Installing packages
  • import pandas
  • Series
  • DataFrame df
  • Reading data from web: pandas_datareader
  • datetime
  • Display DataFrame
  • pd.read_csv, df.to_csv
  • df.head(), df.tail()
  • Get columns: df[[0,3]], df['High'], Single columns Series objects
  • Get rows: df['2012-03'], df.loc, df.iloc, df.ix
  • Concatenate:
  • Merge: Inner and outer joins

Installing Packages and setting up the environment

source activate py27

source activate py35 source activate py36

conda update matplotlib

conda install pandas-datareader --update

Creating and working with Series and DataFrame


In [1]:
import pandas as pd

ids = [100, 200, 300, 301, 308]
names  = ['Ali', 'Veli', 'Ayse', 'Fatma', 'Gamze']
surnames  = ['Yilmaz', 'Gorali', 'Tasci', 'Bakkaloglu', 'Yilmaz']
ages   = [27,32,19,28,32]
gender = ['M','M','F','F','F']
city = ['Istanbul', 'Istanbul', 'Ankara', 'Istanbul', 'Izmir']

number_plate = [('Adana','01'), ('Eskisehir','26'), ('Istanbul', '34'), ('Ankara', '06'), ('Izmir', '35'), ('Denizli','20') ]
s = pd.Series(names)

In [2]:
df = pd.DataFrame(data={'Name': names, 'Surname': surnames, 'Age': ages, 'Gender': gender, 'City': city},
                  index=ids)
#df1 = df[[3,0,2,1]]
df1 = df
df = pd.DataFrame(number_plate, columns=['City','Plate'])
df2 = df

In [3]:
df1


Out[3]:
Age City Gender Name Surname
100 27 Istanbul M Ali Yilmaz
200 32 Istanbul M Veli Gorali
300 19 Ankara F Ayse Tasci
301 28 Istanbul F Fatma Bakkaloglu
308 32 Izmir F Gamze Yilmaz

In [4]:
df3 = pd.DataFrame(data={'Name': names, 'Surname': surnames},index=range(1,len(names)+1))
df3['Surname']


Out[4]:
1        Yilmaz
2        Gorali
3         Tasci
4    Bakkaloglu
5        Yilmaz
Name: Surname, dtype: object

In [5]:
df2


Out[5]:
City Plate
0 Adana 01
1 Eskisehir 26
2 Istanbul 34
3 Ankara 06
4 Izmir 35
5 Denizli 20

In [6]:
data = [('Kadikoy', 40.001, 29.37), ('Etiler', 41.002, 29.38), ('Yesilkoy', 40.376, 28.97) ]

mahalle = pd.DataFrame(data, columns=['Place','Lat','Lon'])
mahalle[['Lat','Lon','Place']]


Out[6]:
Lat Lon Place
0 40.001 29.37 Kadikoy
1 41.002 29.38 Etiler
2 40.376 28.97 Yesilkoy

In [7]:
aapl = pd.read_csv("aapl.csv", index_col=0, parse_dates=True)
aapl


Out[7]:
Open High Low Close Volume Adj Close
Date
2012-01-03 409.399998 412.499989 408.999989 411.230000 75555200 53.784500
2012-01-04 410.000011 414.680012 409.279995 413.440010 65005500 54.073546
2012-01-05 414.950005 418.550007 412.670006 418.029995 67817400 54.673867
2012-01-06 419.770008 422.749996 419.220009 422.400002 79573200 55.245417
2012-01-09 425.499992 427.750004 421.349991 421.730000 98506100 55.157788
2012-01-10 425.909988 426.000004 421.500008 423.239994 64549100 55.355279
2012-01-11 422.680008 422.849998 419.309998 422.549992 53771200 55.265034
2012-01-12 422.279999 422.900013 418.750011 421.389992 53146800 55.113319
2012-01-13 419.699993 420.449997 418.659996 419.810009 56505400 54.906674
2012-01-17 424.199989 425.989990 422.959988 424.700001 60724300 55.546233
2012-01-18 426.959999 429.469990 426.300011 429.110008 69197800 56.123015
2012-01-19 430.150005 431.370007 426.510002 427.750004 65434600 55.945140
2012-01-20 427.489998 427.500011 419.750008 420.300007 103493600 54.970760
2012-01-23 422.669994 428.449993 422.299999 427.409996 76515600 55.900671
2012-01-24 425.100010 425.100010 419.550003 420.409996 136909500 54.985146
2012-01-25 454.439987 454.449974 443.729996 446.659996 239578500 58.418366
2012-01-26 448.360008 448.789978 443.139996 444.629990 80996300 58.152863
2012-01-27 444.339996 448.480011 443.769997 447.280010 74927300 58.499457
2012-01-30 445.709988 453.900002 445.390007 453.009995 94835300 59.248878
2012-01-31 455.589973 458.240021 453.070023 456.479980 97920900 59.702715
2012-02-01 458.410011 458.989998 455.550026 456.189987 67511500 59.664787
2012-02-02 455.899994 457.169983 453.980003 455.120003 46699100 59.524845
2012-02-03 457.300026 460.000008 455.560013 459.680000 71649900 60.121244
2012-02-06 458.379997 464.979988 458.200020 463.969978 62353200 60.682327
2012-02-07 465.250008 469.749977 464.579979 468.830009 79055900 61.317967
2012-02-08 470.500008 476.789978 469.699989 476.680016 101972500 62.344664
2012-02-09 480.760002 496.750008 480.559998 493.170006 221053700 64.501379
2012-02-10 490.960022 497.619987 488.549980 493.419998 157825500 64.534076
2012-02-13 499.529991 503.830009 497.089989 502.600021 129304000 65.734725
2012-02-14 504.659988 509.560020 502.000008 509.459991 115099600 66.631936
... ... ... ... ... ... ...
2012-11-15 537.530006 539.499985 522.620026 525.619987 197477700 69.355985
2012-11-16 525.200005 530.000008 505.750000 527.680008 316723400 69.627806
2012-11-19 540.709999 567.499985 539.880020 565.730011 205829400 74.648535
2012-11-20 571.910019 571.950020 554.580009 560.909981 160688500 74.012528
2012-11-21 564.249977 567.369995 556.599976 561.700012 93250500 74.116773
2012-11-23 567.169991 572.000008 562.600006 571.500023 68206600 75.409892
2012-11-26 575.900017 589.999992 573.710007 589.530022 157644900 77.788965
2012-11-27 589.549995 590.419975 580.100006 584.780006 133332500 77.162197
2012-11-28 577.269981 585.800003 572.259987 582.940018 130216100 76.919409
2012-11-29 590.220024 594.250023 585.249977 589.359978 128674700 77.766528
2012-11-30 586.789986 588.400009 582.679985 585.279991 97829900 77.228170
2012-12-03 593.650009 594.590004 585.500023 586.190025 91070000 77.348250
2012-12-04 581.800018 581.800018 572.129997 575.849976 139267100 75.983872
2012-12-05 568.910004 569.249985 538.769981 538.790009 261159500 71.093779
2012-12-06 528.940010 553.310020 518.629974 547.239975 294303100 72.208759
2012-12-07 553.400009 555.199997 530.000008 533.250015 196760200 70.362773
2012-12-10 525.000000 538.510002 521.580002 529.819977 157621100 69.910177
2012-12-11 539.770004 549.559975 537.370003 541.390015 148086400 71.436852
2012-12-12 547.769974 547.999992 536.270004 539.000000 121786000 71.121488
2012-12-13 531.149994 537.640022 525.800018 529.689987 156314900 69.893024
2012-12-14 514.749992 518.129990 505.580009 509.789986 252394800 67.267203
2012-12-17 508.929993 519.999992 501.230003 518.829979 189401800 68.460037
2012-12-18 525.000000 534.899986 520.249985 533.900017 156421300 70.448541
2012-12-19 531.470001 533.700012 525.499985 526.309990 112342300 69.447031
2012-12-20 530.000008 530.200012 518.880020 521.730019 120422400 68.842700
2012-12-21 512.469994 519.669998 510.239983 519.330017 149067100 68.526018
2012-12-24 520.350014 524.250023 518.709976 520.169983 43938300 68.636852
2012-12-26 519.000023 519.460007 511.120003 512.999992 75609100 67.690766
2012-12-27 513.539978 516.250000 504.659988 515.060013 113780100 67.962587
2012-12-28 510.290024 514.480026 508.119987 509.589981 88569600 67.240812

249 rows × 6 columns


In [8]:
u = pd.read_clipboard()

In [9]:
u


Out[9]:
Date Open High Low Close Volume Adj Close
0 2012-01-03 409.399998 412.499989 408.999989 411.230000 75555200 53.784500
1 2012-01-04 410.000011 414.680012 409.279995 413.440010 65005500 54.073546
2 2012-01-05 414.950005 418.550007 412.670006 418.029995 67817400 54.673867
3 2012-01-06 419.770008 422.749996 419.220009 422.400002 79573200 55.245417
4 2012-01-09 425.499992 427.750004 421.349991 421.730000 98506100 55.157788
5 2012-01-10 425.909988 426.000004 421.500008 423.239994 64549100 55.355279
6 2012-01-11 422.680008 422.849998 419.309998 422.549992 53771200 55.265034
7 2012-01-12 422.279999 422.900013 418.750011 421.389992 53146800 55.113319
8 2012-01-13 419.699993 420.449997 418.659996 419.810009 56505400 54.906674
9 2012-01-17 424.199989 425.989990 422.959988 424.700001 60724300 55.546233
10 2012-01-18 426.959999 429.469990 426.300011 429.110008 69197800 56.123015
11 2012-01-19 430.150005 431.370007 426.510002 427.750004 65434600 55.945140
12 2012-01-20 427.489998 427.500011 419.750008 420.300007 103493600 54.970760
13 2012-01-23 422.669994 428.449993 422.299999 427.409996 76515600 55.900671
14 2012-01-24 425.100010 425.100010 419.550003 420.409996 136909500 54.985146
15 2012-01-25 454.439987 454.449974 443.729996 446.659996 239578500 58.418366
16 2012-01-26 448.360008 448.789978 443.139996 444.629990 80996300 58.152863
17 2012-01-27 444.339996 448.480011 443.769997 447.280010 74927300 58.499457
18 2012-01-30 445.709988 453.900002 445.390007 453.009995 94835300 59.248878
19 2012-01-31 455.589973 458.240021 453.070023 456.479980 97920900 59.702715
20 2012-02-01 458.410011 458.989998 455.550026 456.189987 67511500 59.664787
21 2012-02-02 455.899994 457.169983 453.980003 455.120003 46699100 59.524845
22 2012-02-03 457.300026 460.000008 455.560013 459.680000 71649900 60.121244
23 2012-02-06 458.379997 464.979988 458.200020 463.969978 62353200 60.682327
24 2012-02-07 465.250008 469.749977 464.579979 468.830009 79055900 61.317967
25 2012-02-08 470.500008 476.789978 469.699989 476.680016 101972500 62.344664
26 2012-02-09 480.760002 496.750008 480.559998 493.170006 221053700 64.501379
27 2012-02-10 490.960022 497.619987 488.549980 493.419998 157825500 64.534076
28 2012-02-13 499.529991 503.830009 497.089989 502.600021 129304000 65.734725
29 2012-02-14 504.659988 509.560020 502.000008 509.459991 115099600 66.631936
... ... ... ... ... ... ... ...
219 2012-11-15 537.530006 539.499985 522.620026 525.619987 197477700 69.355985
220 2012-11-16 525.200005 530.000008 505.750000 527.680008 316723400 69.627806
221 2012-11-19 540.709999 567.499985 539.880020 565.730011 205829400 74.648535
222 2012-11-20 571.910019 571.950020 554.580009 560.909981 160688500 74.012528
223 2012-11-21 564.249977 567.369995 556.599976 561.700012 93250500 74.116773
224 2012-11-23 567.169991 572.000008 562.600006 571.500023 68206600 75.409892
225 2012-11-26 575.900017 589.999992 573.710007 589.530022 157644900 77.788965
226 2012-11-27 589.549995 590.419975 580.100006 584.780006 133332500 77.162197
227 2012-11-28 577.269981 585.800003 572.259987 582.940018 130216100 76.919409
228 2012-11-29 590.220024 594.250023 585.249977 589.359978 128674700 77.766528
229 2012-11-30 586.789986 588.400009 582.679985 585.279991 97829900 77.228170
230 2012-12-03 593.650009 594.590004 585.500023 586.190025 91070000 77.348250
231 2012-12-04 581.800018 581.800018 572.129997 575.849976 139267100 75.983872
232 2012-12-05 568.910004 569.249985 538.769981 538.790009 261159500 71.093779
233 2012-12-06 528.940010 553.310020 518.629974 547.239975 294303100 72.208759
234 2012-12-07 553.400009 555.199997 530.000008 533.250015 196760200 70.362773
235 2012-12-10 525.000000 538.510002 521.580002 529.819977 157621100 69.910177
236 2012-12-11 539.770004 549.559975 537.370003 541.390015 148086400 71.436852
237 2012-12-12 547.769974 547.999992 536.270004 539.000000 121786000 71.121488
238 2012-12-13 531.149994 537.640022 525.800018 529.689987 156314900 69.893024
239 2012-12-14 514.749992 518.129990 505.580009 509.789986 252394800 67.267203
240 2012-12-17 508.929993 519.999992 501.230003 518.829979 189401800 68.460037
241 2012-12-18 525.000000 534.899986 520.249985 533.900017 156421300 70.448541
242 2012-12-19 531.470001 533.700012 525.499985 526.309990 112342300 69.447031
243 2012-12-20 530.000008 530.200012 518.880020 521.730019 120422400 68.842700
244 2012-12-21 512.469994 519.669998 510.239983 519.330017 149067100 68.526018
245 2012-12-24 520.350014 524.250023 518.709976 520.169983 43938300 68.636852
246 2012-12-26 519.000023 519.460007 511.120003 512.999992 75609100 67.690766
247 2012-12-27 513.539978 516.250000 504.659988 515.060013 113780100 67.962587
248 2012-12-28 510.290024 514.480026 508.119987 509.589981 88569600 NaN

249 rows × 7 columns


In [50]:
pd.merge(df1, df2, on='City',  how='inner').sort_values('Name')


Out[50]:
Age City Gender Name Surname Plate
0 27 Istanbul M Ali Yilmaz 34
3 19 Ankara F Ayse Tasci 06
2 28 Istanbul F Fatma Bakkaloglu 34
4 32 Izmir F Gamze Yilmaz 35
1 32 Istanbul M Veli Gorali 34

In [146]:
pd.merge(df1, df2, how='outer').sort_values('Plate')


Out[146]:
Age City Gender Name Surname Plate
5 NaN Adana NaN NaN NaN 01
3 19.0 Ankara F Ayse Tasci 06
7 NaN Denizli NaN NaN NaN 20
6 NaN Eskisehir NaN NaN NaN 26
0 27.0 Istanbul M Ali Yilmaz 34
1 32.0 Istanbul M Veli Gorali 34
2 28.0 Istanbul F Fatma Bakkaloglu 34
4 32.0 Izmir F Gamze Yilmaz 35

In [113]:
df1.sort_values('Age')


Out[113]:
Name Age Gender City
2 Ayse 19 F Ankara
0 Ali 27 M Istanbul
3 Fatma 28 F Istanbul
1 Veli 32 M Istanbul
4 Gamze 32 F Izmir

In [114]:
df1.sort_values('City')


Out[114]:
Name Age Gender City
2 Ayse 19 F Ankara
0 Ali 27 M Istanbul
1 Veli 32 M Istanbul
3 Fatma 28 F Istanbul
4 Gamze 32 F Izmir

In [149]:
df1['Surname'].value_counts().sort_values()


Out[149]:
Bakkaloglu    1
Gorali        1
Tasci         1
Yilmaz        2
Name: Surname, dtype: int64

In [150]:
df1['Gender'].value_counts().sort_values()


Out[150]:
M    2
F    3
Name: Gender, dtype: int64

In [153]:
df1.index.get_loc(300)


Out[153]:
2

In [154]:
df1.at[100, 'Name']


Out[154]:
'Ali'

In [157]:
print(df1)
df1.iat[0,1]


     Age      City Gender   Name     Surname
100   27  Istanbul      M    Ali      Yilmaz
200   32  Istanbul      M   Veli      Gorali
300   19    Ankara      F   Ayse       Tasci
301   28  Istanbul      F  Fatma  Bakkaloglu
308   32     Izmir      F  Gamze      Yilmaz
Out[157]:
'Istanbul'

In [159]:
df1[(df1.Age<30) & (df1.City == 'Istanbul')]


Out[159]:
Age City Gender Name Surname
100 27 Istanbul M Ali Yilmaz
301 28 Istanbul F Fatma Bakkaloglu

Subframe


In [174]:
sdf1 = df1[['Name','Surname']].ix[100:300]


Out[174]:
Name Surname
100 Ali Yilmaz
200 Veli Gorali
300 Ayse Tasci

In [193]:
s  =pd.Series([3.2,3,1,10,4],index=['A','B','C','D','E'])
print(s)
q = s.reindex(['A','C','E','F'])
print(q)

u = pd.DataFrame({'s': s, 'q': q})


A     3.2
B     3.0
C     1.0
D    10.0
E     4.0
dtype: float64
A    3.2
C    1.0
E    4.0
F    NaN
dtype: float64

In [203]:
#letters = [(c,chr(c)) for c in range(65,91)  ]
letters = [chr(c) for c in range(65,91)  ]
letters
u.reindex(letters)


Out[203]:
q s
A 3.2 3.2
B NaN 3.0
C 1.0 1.0
D NaN 10.0
E 4.0 4.0
F NaN NaN
G NaN NaN
H NaN NaN
I NaN NaN
J NaN NaN
K NaN NaN
L NaN NaN
M NaN NaN
N NaN NaN
O NaN NaN
P NaN NaN
Q NaN NaN
R NaN NaN
S NaN NaN
T NaN NaN
U NaN NaN
V NaN NaN
W NaN NaN
X NaN NaN
Y NaN NaN
Z NaN NaN

In [22]:
df1


Out[22]:
Age City Gender Name Surname
100 27 Istanbul M Ali Yilmaz
200 32 Istanbul M Veli Gorali
300 19 Ankara F Ayse Tasci
301 28 Istanbul F Fatma Bakkaloglu
308 32 Izmir F Gamze Yilmaz

In [23]:
df1.query('Age<30')


Out[23]:
Age City Gender Name Surname
100 27 Istanbul M Ali Yilmaz
300 19 Ankara F Ayse Tasci
301 28 Istanbul F Fatma Bakkaloglu

In [48]:
tmp = pd.melt(df1, id_vars=['Age', 'City'])
tmp
tmp.pivot(values='value')


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-48-4dda58716fd1> in <module>()
      1 tmp = pd.melt(df1, id_vars=['Age', 'City'])
      2 tmp
----> 3 tmp.pivot(values='value')

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/frame.pyc in pivot(self, index, columns, values)
   3949         """
   3950         from pandas.core.reshape.reshape import pivot
-> 3951         return pivot(self, index=index, columns=columns, values=values)
   3952 
   3953     def stack(self, level=-1, dropna=True):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/reshape/reshape.pyc in pivot(self, index, columns, values)
    375             index = self[index]
    376         indexed = Series(self[values].values,
--> 377                          index=MultiIndex.from_arrays([index, self[columns]]))
    378         return indexed.unstack(columns)
    379 

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   2060             return self._getitem_multilevel(key)
   2061         else:
-> 2062             return self._getitem_column(key)
   2063 
   2064     def _getitem_column(self, key):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   2067         # get column
   2068         if self.columns.is_unique:
-> 2069             return self._get_item_cache(key)
   2070 
   2071         # duplicate columns & possible reduce dimensionality

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1532         res = cache.get(item)
   1533         if res is None:
-> 1534             values = self._data.get(item)
   1535             res = self._box_item_values(item, values)
   1536             cache[item] = res

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   3597                         loc = indexer.item()
   3598                     else:
-> 3599                         raise ValueError("cannot label index with a null key")
   3600 
   3601             return self.iget(loc, fastpath=fastpath)

ValueError: cannot label index with a null key

In [54]:
df1.sample(2)


Out[54]:
Age City Gender Name Surname
300 19 Ankara F Ayse Tasci
100 27 Istanbul M Ali Yilmaz

In [25]:
df1.filter(regex='.ender')


Out[25]:
Gender
100 M
200 M
300 F
301 F
308 F

In [26]:
df1['City'].value_counts()


Out[26]:
Istanbul    3
Izmir       1
Ankara      1
Name: City, dtype: int64

In [27]:
df1.Gender.value_counts()


Out[27]:
F    3
M    2
Name: Gender, dtype: int64

In [36]:
df1


Out[36]:
Age City Gender Name Surname
100 27 Istanbul M Ali Yilmaz
200 32 Istanbul M Veli Gorali
300 19 Ankara F Ayse Tasci
301 28 Istanbul F Fatma Bakkaloglu
308 32 Izmir F Gamze Yilmaz

Grouping


In [44]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sc
import pandas as pd

gb = df1.groupby('Gender')
gb.rank(method='dense')


Out[44]:
Age City Name Surname
100 1.0 1.0 1.0 2.0
200 2.0 1.0 2.0 1.0
300 1.0 1.0 1.0 2.0
301 2.0 2.0 2.0 1.0
308 3.0 3.0 3.0 3.0

Lets load some data from the web


In [199]:
import pandas as pd
import pandas_datareader as web


import datetime

start = datetime.datetime(2015, 1, 1)
end = datetime.datetime(2017, 12, 15)
msft = web.DataReader("MSFT", 'yahoo', start, end)
aapl = web.DataReader("AAPL", 'yahoo', start, end)

Display


In [56]:
print(msft[['Open','Close']])


                 Open      Close
Date                            
2014-12-31  46.730000  46.450001
2015-01-02  46.660000  46.759998
2015-01-05  46.369999  46.330002
2015-01-06  46.380001  45.650002
2015-01-07  45.980000  46.230000
2015-01-08  46.750000  47.590000
2015-01-09  47.610001  47.189999
2015-01-12  47.419998  46.599998
2015-01-13  46.970001  46.360001
2015-01-14  45.959999  45.959999
2015-01-15  46.220001  45.480000
2015-01-16  45.310001  46.240002
2015-01-20  46.299999  46.389999
2015-01-21  45.939999  45.919998
2015-01-22  46.380001  47.130001
2015-01-23  47.360001  47.180000
2015-01-26  47.000000  47.009998
2015-01-27  42.950001  42.660000
2015-01-28  42.740002  41.189999
2015-01-29  40.930000  42.009998
2015-01-30  41.549999  40.400002
2015-02-02  40.590000  41.279999
2015-02-03  41.630001  41.599998
2015-02-04  41.939999  41.840000
2015-02-05  42.220001  42.450001
2015-02-06  42.680000  42.410000
2015-02-09  42.240002  42.360001
2015-02-10  42.740002  42.599998
2015-02-11  42.650002  42.380001
2015-02-12  42.660000  43.090000
...               ...        ...
2017-10-03  74.669998  74.260002
2017-10-04  74.089996  74.690002
2017-10-05  75.220001  75.970001
2017-10-06  75.669998  76.000000
2017-10-09  75.970001  76.290001
2017-10-10  76.330002  76.290001
2017-10-11  76.360001  76.419998
2017-10-12  76.489998  77.120003
2017-10-13  77.589996  77.489998
2017-10-16  77.419998  77.650002
2017-10-17  77.470001  77.589996
2017-10-18  77.669998  77.610001
2017-10-19  77.570000  77.910004
2017-10-20  78.320000  78.809998
2017-10-23  78.989998  78.830002
2017-10-24  78.900002  78.860001
2017-10-25  78.580002  78.629997
2017-10-26  79.199997  78.760002
2017-10-27  84.370003  83.809998
2017-10-30  83.699997  83.889999
2017-10-31  84.360001  83.180000
2017-11-01  83.680000  83.180000
2017-11-02  83.349998  84.050003
2017-11-03  84.080002  84.139999
2017-11-06  84.199997  84.470001
2017-11-07  84.769997  84.269997
2017-11-08  84.139999  84.559998
2017-11-09  84.110001  84.089996
2017-11-10  83.790001  83.870003
2017-11-13  83.660004  83.599998

[724 rows x 2 columns]

Optionally save to a csv file


In [3]:
msft.to_csv("msft.csv")
aapl.to_csv("aapl.csv")

Read from file


In [7]:
msft2 = pd.read_csv("msft.csv", index_col=0, parse_dates=True)
aapl2 = pd.read_csv("aapl.csv", index_col=0, parse_dates=True)

Check that data is the same


In [11]:
print(msft2.head())
print(msft.head())


                 Open       High        Low      Close    Volume  Adj Close
Date                                                                       
2012-01-03  26.549999  26.959999  26.389999  26.770000  64731500  23.461752
2012-01-04  26.820000  27.469999  26.780001  27.400000  80516100  24.013895
2012-01-05  27.379999  27.730000  27.290001  27.680000  56081400  24.259293
2012-01-06  27.530001  28.190001  27.530001  28.110001  99455500  24.636154
2012-01-09  28.049999  28.100000  27.719999  27.740000  59706800  24.311878
                 Open       High        Low      Close    Volume  Adj Close
Date                                                                       
2012-01-03  26.549999  26.959999  26.389999  26.770000  64731500  23.461752
2012-01-04  26.820000  27.469999  26.780001  27.400000  80516100  24.013895
2012-01-05  27.379999  27.730000  27.290001  27.680000  56081400  24.259293
2012-01-06  27.530001  28.190001  27.530001  28.110001  99455500  24.636154
2012-01-09  28.049999  28.100000  27.719999  27.740000  59706800  24.311878

Get columns. Each column is a series object.


In [16]:
Open = msft[[0]]
High = msft['High']
print(Open[0:3])


                 Open
Date                 
2012-01-03  26.549999
2012-01-04  26.820000
2012-01-05  27.379999

In [37]:
msftA01 = msft.loc['2012-01'][['Adj Close']]
msftA02 = msft['2012-02'][['Adj Close']]
aaplA01 = aapl['2012-01'][['Adj Close']]

msftAV = msft['2012-01'][['Adj Close','Volume']]
aaplAV = aapl['2012-01'][['Adj Close','Volume']]

In [64]:
msftA01


Out[64]:
Adj Close
Date
2012-01-03 23.461752
2012-01-04 24.013895
2012-01-05 24.259293
2012-01-06 24.636154
2012-01-09 24.311878
2012-01-10 24.399520
2012-01-11 24.294349
2012-01-12 24.539747
2012-01-13 24.758852
2012-01-17 24.767617
2012-01-18 24.741323
2012-01-19 24.644918
2012-01-20 26.038424
2012-01-23 26.055953
2012-01-24 25.714150
2012-01-25 25.906961
2012-01-26 25.854377
2012-01-27 25.617743
2012-01-30 25.950783
2012-01-31 25.880670

In [84]:
msft[['Volume','Close']]


Out[84]:
Volume Close
Date
2012-01-03 64731500 26.770000
2012-01-04 80516100 27.400000
2012-01-05 56081400 27.680000
2012-01-06 99455500 28.110001
2012-01-09 59706800 27.740000
2012-01-10 60014400 27.840000
2012-01-11 65582400 27.719999
2012-01-12 49370800 28.000000
2012-01-13 60196100 28.250000
2012-01-17 72395300 28.260000
2012-01-18 64860600 28.230000
2012-01-19 74053500 28.120001
2012-01-20 165902900 29.709999
2012-01-23 76078100 29.730000
2012-01-24 51703300 29.340000
2012-01-25 59231700 29.559999
2012-01-26 49102800 29.500000
2012-01-27 44187700 29.230000
2012-01-30 51114800 29.610001
2012-01-31 50572400 29.530001
2012-02-01 67409900 29.889999
2012-02-02 52223300 29.950001
2012-02-03 41838500 30.240000
2012-02-06 28039700 30.200001
2012-02-07 39242400 30.350000
2012-02-08 49659100 30.660000
2012-02-09 50481600 30.770000
2012-02-10 44605300 30.500000
2012-02-13 33319800 30.580000
2012-02-14 59644000 30.250000
... ... ...
2012-11-15 50955600 26.660000
2012-11-16 64083300 26.520000
2012-11-19 57179300 26.730000
2012-11-20 47070400 26.709999
2012-11-21 66360300 26.950001
2012-11-23 57845700 27.700001
2012-11-26 85198700 27.389999
2012-11-27 45018600 27.080000
2012-11-28 53018400 27.360001
2012-11-29 69551400 26.950001
2012-11-30 83690200 26.620001
2012-12-03 53173800 26.430000
2012-12-04 49777500 26.370001
2012-12-05 68283800 26.670000
2012-12-06 39182300 26.730000
2012-12-07 46162100 26.459999
2012-12-10 47031100 26.940001
2012-12-11 52282800 27.320000
2012-12-12 43966300 27.240000
2012-12-13 45080100 27.110001
2012-12-14 42077500 26.809999
2012-12-17 42046100 27.100000
2012-12-18 50486900 27.559999
2012-12-19 53519900 27.309999
2012-12-20 52607300 27.680000
2012-12-21 98776500 27.450001
2012-12-24 20842400 27.059999
2012-12-26 31631100 26.860001
2012-12-27 39394000 26.959999
2012-12-28 28239900 26.549999

249 rows × 2 columns

Concatenate


In [22]:
pd.concat([msftA01.head(3), msftA02.head(3)])


Out[22]:
Adj Close
Date
2012-01-03 23.461752
2012-01-04 24.013895
2012-01-05 24.259293
2012-02-01 26.196180
2012-02-02 26.248766
2012-02-03 26.502927

In [29]:
withDups = pd.concat([aaplA01[:3], msftA01[:3]])
print(withDups)

withDups.ix['2012-01-04']


            Adj Close
Date                 
2012-01-03  53.784500
2012-01-04  54.073546
2012-01-05  54.673867
2012-01-03  23.461752
2012-01-04  24.013895
2012-01-05  24.259293
Out[29]:
Adj Close
Date
2012-01-04 54.073546
2012-01-04 24.013895

Multiindex


In [44]:
withDups = pd.concat([aaplA01[:3], msftA01[:3]],keys=['AAPL', 'MSFT'])

print(withDups)
print(withDups.ix['AAPL'])


                 Adj Close
     Date                 
AAPL 2012-01-03  53.784500
     2012-01-04  54.073546
     2012-01-05  54.673867
MSFT 2012-01-03  23.461752
     2012-01-04  24.013895
     2012-01-05  24.259293
            Adj Close
Date                 
2012-01-03  53.784500
2012-01-04  54.073546
2012-01-05  54.673867

In [56]:
u = pd.concat([msftAV, aaplAV], keys = ['MSFT','AAPL'])
u.loc['MSFT'].loc['2012-01']


Out[56]:
Adj Close Volume
Date
2012-01-03 23.461752 64731500
2012-01-04 24.013895 80516100
2012-01-05 24.259293 56081400
2012-01-06 24.636154 99455500
2012-01-09 24.311878 59706800
2012-01-10 24.399520 60014400
2012-01-11 24.294349 65582400
2012-01-12 24.539747 49370800
2012-01-13 24.758852 60196100
2012-01-17 24.767617 72395300
2012-01-18 24.741323 64860600
2012-01-19 24.644918 74053500
2012-01-20 26.038424 165902900
2012-01-23 26.055953 76078100
2012-01-24 25.714150 51703300
2012-01-25 25.906961 59231700
2012-01-26 25.854377 49102800
2012-01-27 25.617743 44187700
2012-01-30 25.950783 51114800
2012-01-31 25.880670 50572400

Merge: Inner and outer joins


In [16]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.figure(figsize=(15,3))
plt.plot(aapl[['Adj Close']])
plt.show()



In [29]:
msft.columns


Out[29]:
Index([u'Open', u'High', u'Low', u'Close', u'Volume', u'Adj Close'], dtype='object')

In [33]:
msft2 = pd.read_csv("msft.csv", index_col=0, parse_dates=True)
msft2


Out[33]:
Open High Low Close Volume AdjClose
Date
2012-01-03 26.549999 26.959999 26.389999 26.770000 64731500 23.461752
2012-01-04 26.820000 27.469999 26.780001 27.400000 80516100 24.013895
2012-01-05 27.379999 27.730000 27.290001 27.680000 56081400 24.259293
2012-01-06 27.530001 28.190001 27.530001 28.110001 99455500 24.636154
2012-01-09 28.049999 28.100000 27.719999 27.740000 59706800 24.311878
2012-01-10 27.930000 28.150000 27.750000 27.840000 60014400 24.399520
2012-01-11 27.430000 27.980000 27.370001 27.719999 65582400 24.294349
2012-01-12 27.870001 28.020000 27.650000 28.000000 49370800 24.539747
2012-01-13 27.930000 28.250000 27.790001 28.250000 60196100 24.758852
2012-01-17 28.400000 28.650000 28.170000 28.260000 72395300 24.767617
2012-01-18 28.309999 28.400000 27.969999 28.230000 64860600 24.741323
2012-01-19 28.160000 28.440001 28.030001 28.120001 74053500 24.644918
2012-01-20 28.820000 29.740000 28.750000 29.709999 165902900 26.038424
2012-01-23 29.549999 29.950001 29.350000 29.730000 76078100 26.055953
2012-01-24 29.469999 29.570000 29.180000 29.340000 51703300 25.714150
2012-01-25 29.070000 29.650000 29.070000 29.559999 59231700 25.906961
2012-01-26 29.610001 29.700001 29.400000 29.500000 49102800 25.854377
2012-01-27 29.450001 29.530001 29.170000 29.230000 44187700 25.617743
2012-01-30 28.969999 29.620001 28.830000 29.610001 51114800 25.950783
2012-01-31 29.660000 29.700001 29.230000 29.530001 50572400 25.880670
2012-02-01 29.790001 30.049999 29.760000 29.889999 67409900 26.196180
2012-02-02 29.900000 30.170000 29.709999 29.950001 52223300 26.248766
2012-02-03 30.139999 30.400000 30.090000 30.240000 41838500 26.502927
2012-02-06 30.040001 30.219999 29.969999 30.200001 28039700 26.467871
2012-02-07 30.150000 30.490000 30.049999 30.350000 39242400 26.599334
2012-02-08 30.260000 30.670000 30.219999 30.660000 49659100 26.871023
2012-02-09 30.680000 30.799999 30.480000 30.770000 50481600 26.967430
2012-02-10 30.639999 30.799999 30.360001 30.500000 44605300 26.730796
2012-02-13 30.629999 30.770000 30.430000 30.580000 33319800 26.800910
2012-02-14 30.330000 30.459999 29.850000 30.250000 59644000 26.686226
... ... ... ... ... ... ...
2012-11-15 26.879999 26.969999 26.629999 26.660000 50955600 24.026140
2012-11-16 26.670000 26.700001 26.340000 26.520000 64083300 23.899971
2012-11-19 26.799999 26.799999 26.469999 26.730000 57179300 24.089224
2012-11-20 26.760000 26.799999 26.459999 26.709999 47070400 24.071199
2012-11-21 26.709999 27.170000 26.670000 26.950001 66360300 24.287490
2012-11-23 27.230000 27.770000 27.200001 27.700001 57845700 24.963394
2012-11-26 27.540001 27.580000 27.170000 27.389999 85198700 24.684019
2012-11-27 27.360001 27.379999 27.040001 27.080000 45018600 24.404646
2012-11-28 27.010000 27.389999 26.770000 27.360001 53018400 24.656984
2012-11-29 27.110001 27.360001 26.860001 26.950001 69551400 24.287490
2012-11-30 27.049999 27.129999 26.490000 26.620001 83690200 23.990092
2012-12-03 26.780001 26.820000 26.400000 26.430000 53173800 23.818863
2012-12-04 26.500000 26.629999 26.340000 26.370001 49777500 23.764791
2012-12-05 26.379999 26.930000 26.260000 26.670000 68283800 24.035152
2012-12-06 26.809999 26.980000 26.610001 26.730000 39182300 24.089224
2012-12-07 26.820000 26.820000 26.370001 26.459999 46162100 23.845898
2012-12-10 26.559999 26.969999 26.520000 26.940001 47031100 24.278478
2012-12-11 27.049999 27.490000 27.049999 27.320000 52282800 24.620935
2012-12-12 27.530001 27.620001 27.080000 27.240000 43966300 24.548839
2012-12-13 27.320000 27.520000 26.950001 27.110001 45080100 24.431683
2012-12-14 27.110001 27.129999 26.700001 26.809999 42077500 24.161320
2012-12-17 26.790001 27.219999 26.680000 27.100000 42046100 24.422670
2012-12-18 27.250000 27.629999 27.139999 27.559999 50486900 24.837224
2012-12-19 27.690001 27.730000 27.250000 27.309999 53519900 24.611923
2012-12-20 27.360001 27.680000 27.150000 27.680000 52607300 24.945370
2012-12-21 27.450001 27.490000 27.000000 27.450001 98776500 24.738093
2012-12-24 27.200001 27.250000 27.000000 27.059999 20842400 24.386621
2012-12-26 27.030001 27.200001 26.700001 26.860001 31631100 24.206381
2012-12-27 26.889999 27.090000 26.570000 26.959999 39394000 24.296501
2012-12-28 26.709999 26.900000 26.549999 26.549999 28239900 23.927006

249 rows × 6 columns


In [53]:
plt.plot(aapl['2018-02-03':'2018-05-07']['Volume'])


plt.plot(y['Volume'])
plt.show()



In [69]:
x = aapl['2012-02-05':'2012-02-23']['Open']
y = aapl['2012-02-05':'2012-02-23']['Close']
Vol = aapl['2012-02-05':'2012-02-23']['Volume']

plt.plot(x)
plt.plot(y)
plt.legend()
plt.show()



In [78]:
import numpy as np
plt.plot(np.log(np.array(x.ix[1:10])/np.array(x.ix[0:9])))
plt.show()


Write a program that visualizes a candle stick plot from aapl or msft


In [85]:
plt.plot(np.log(aapl['Adj Close']).diff())
plt.show()



In [96]:
#np.log(aapl['Adj Close']).diff().hist()

x = msft['Adj Close']

np.log(x).diff().hist(bins=100)
plt.gca().set_xlim((-0.2,0.2))
plt.show()



In [200]:
plt.figure(figsize=(10,8))
msft['Open'].rolling(100).mean().plot()
#msft['Open'].expanding().mean().plot()
msft['Open'].plot()

plt.show()



In [71]:
msft.plot.scatter(x='Open', y ='Close')
plt.show()



In [195]:
u = msft[0:5]

In [198]:
u.shift(-1)


Out[198]:
Open High Low Close Adj Close Volume
Date
2014-12-31 46.660000 47.419998 46.540001 46.759998 43.555000 27913900.0
2015-01-02 46.369999 46.730000 46.250000 46.330002 43.154472 39673900.0
2015-01-05 46.380001 46.750000 45.540001 45.650002 42.521076 36447900.0
2015-01-06 45.980000 46.459999 45.490002 46.230000 43.061325 29114100.0
2015-01-07 NaN NaN NaN NaN NaN NaN

In [79]:
plt.scatter(msft['Open'], msft['Open'].shift(10))
plt.axis('equal')
plt.show()



In [76]:
df1.Age.shift(-1)


Out[76]:
100    32.0
200    19.0
300    28.0
301    32.0
308     NaN
Name: Age, dtype: float64

In [80]:
import this


The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

In [190]:
goog = web.DataReader('GOOG', data_source='google',start='3/14/2009', end='4/14/2014')
goog.tail()


Out[190]:
Open High Low Close Volume
Date
2017-11-07 1027.27 1033.97 1025.13 1033.33 1112331
2017-11-08 1030.52 1043.52 1028.45 1039.85 1088716
2017-11-09 1033.99 1033.99 1019.67 1031.26 1245246
2017-11-10 1026.46 1030.76 1025.28 1028.07 720676
2017-11-13 1023.42 1031.58 1022.57 1025.75 885779

In [191]:
goog.Open.plot()
plt.show()



In [194]:
goog['Log_return'] = np.log(goog['Close']/goog['Close'].shift(-1))
goog['Volatality'] = goog.Log_return.rolling(10).std()
goog.Volatality.plot()
plt.show()



In [98]:
import pandas as pd
plaka = ['01','03','06','16','26','32','34','55','67']
df = pd.DataFrame(data={'City': ['Adana', 'Afyon','Ankara','Bursa','Eskisehir','Isparta','Istanbul','Samsun','Zonguldak'],\
                         'MegaCity': [True, False, False, True, False, False, True, False, False]}, index=plaka)

In [99]:
df


Out[99]:
City MegaCity
01 Adana True
03 Afyon False
06 Ankara False
16 Bursa True
26 Eskisehir False
32 Isparta False
34 Istanbul True
55 Samsun False
67 Zonguldak False

In [100]:
df2 = pd.DataFrame(data={'City': ['Icel', 'Antalya'],\
                         'MegaCity': [False, True]}, index=['33','07'])

In [101]:
df2


Out[101]:
City MegaCity
33 Icel False
07 Antalya True

In [104]:
sehirler = pd.concat([df, df2])

In [105]:
sehirler


Out[105]:
City MegaCity
01 Adana True
03 Afyon False
06 Ankara False
16 Bursa True
26 Eskisehir False
32 Isparta False
34 Istanbul True
55 Samsun False
67 Zonguldak False
33 Icel False
07 Antalya True

In [107]:
sehirler['NumOfCustomers'] = np.random.randint(1,100,11)

In [115]:
sehirler[sehirler.MegaCity == False].NumOfCustomers.sum()


Out[115]:
302L

In [121]:
sehirler.query('City in ["A","a"]')


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-121-cee04bd141bb> in <module>()
----> 1 sehirler.query('str(City)[0] in ["A","a"]')

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/frame.pyc in query(self, expr, inplace, **kwargs)
   2212         kwargs['level'] = kwargs.pop('level', 0) + 1
   2213         kwargs['target'] = None
-> 2214         res = self.eval(expr, **kwargs)
   2215 
   2216         try:

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/frame.pyc in eval(self, expr, inplace, **kwargs)
   2282             kwargs['target'] = self
   2283         kwargs['resolvers'] = kwargs.get('resolvers', ()) + tuple(resolvers)
-> 2284         return _eval(expr, inplace=inplace, **kwargs)
   2285 
   2286     def select_dtypes(self, include=None, exclude=None):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/eval.pyc in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    260 
    261         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env,
--> 262                            truediv=truediv)
    263 
    264         # construct the engine and evaluate the parsed expression

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in __init__(self, expr, engine, parser, env, truediv, level)
    725         self.env.scope['truediv'] = truediv
    726         self._visitor = _parsers[parser](self.env, self.engine, self.parser)
--> 727         self.terms = self.parse()
    728 
    729     @property

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in parse(self)
    742     def parse(self):
    743         """Parse an expression"""
--> 744         return self._visitor.visit(self.expr)
    745 
    746     @property

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit(self, node, **kwargs)
    311         method = 'visit_' + node.__class__.__name__
    312         visitor = getattr(self, method)
--> 313         return visitor(node, **kwargs)
    314 
    315     def visit_Module(self, node, **kwargs):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit_Module(self, node, **kwargs)
    317             raise SyntaxError('only a single expression is allowed')
    318         expr = node.body[0]
--> 319         return self.visit(expr, **kwargs)
    320 
    321     def visit_Expr(self, node, **kwargs):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit(self, node, **kwargs)
    311         method = 'visit_' + node.__class__.__name__
    312         visitor = getattr(self, method)
--> 313         return visitor(node, **kwargs)
    314 
    315     def visit_Module(self, node, **kwargs):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit_Expr(self, node, **kwargs)
    320 
    321     def visit_Expr(self, node, **kwargs):
--> 322         return self.visit(node.value, **kwargs)
    323 
    324     def _rewrite_membership_op(self, node, left, right):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit(self, node, **kwargs)
    311         method = 'visit_' + node.__class__.__name__
    312         visitor = getattr(self, method)
--> 313         return visitor(node, **kwargs)
    314 
    315     def visit_Module(self, node, **kwargs):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit_Compare(self, node, **kwargs)
    642             op = self.translate_In(ops[0])
    643             binop = ast.BinOp(op=op, left=node.left, right=comps[0])
--> 644             return self.visit(binop)
    645 
    646         # recursive case: we have a chained comparison, a CMP b CMP c, etc.

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit(self, node, **kwargs)
    311         method = 'visit_' + node.__class__.__name__
    312         visitor = getattr(self, method)
--> 313         return visitor(node, **kwargs)
    314 
    315     def visit_Module(self, node, **kwargs):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit_BinOp(self, node, **kwargs)
    414 
    415     def visit_BinOp(self, node, **kwargs):
--> 416         op, op_class, left, right = self._maybe_transform_eq_ne(node)
    417         left, right = self._maybe_downcast_constants(left, right)
    418         return self._maybe_evaluate_binop(op, op_class, left, right)

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in _maybe_transform_eq_ne(self, node, left, right)
    352     def _maybe_transform_eq_ne(self, node, left=None, right=None):
    353         if left is None:
--> 354             left = self.visit(node.left, side='left')
    355         if right is None:
    356             right = self.visit(node.right, side='right')

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit(self, node, **kwargs)
    311         method = 'visit_' + node.__class__.__name__
    312         visitor = getattr(self, method)
--> 313         return visitor(node, **kwargs)
    314 
    315     def visit_Module(self, node, **kwargs):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit_Subscript(self, node, **kwargs)
    451 
    452     def visit_Subscript(self, node, **kwargs):
--> 453         value = self.visit(node.value)
    454         slobj = self.visit(node.slice)
    455         result = pd.eval(slobj, local_dict=self.env, engine=self.engine,

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit(self, node, **kwargs)
    311         method = 'visit_' + node.__class__.__name__
    312         visitor = getattr(self, method)
--> 313         return visitor(node, **kwargs)
    314 
    315     def visit_Module(self, node, **kwargs):

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/expr.pyc in visit_Call_legacy(self, node, side, **kwargs)
    593                 # Check if this is a supported function name
    594                 try:
--> 595                     res = FuncNode(node.func.id)
    596                 except ValueError:
    597                     # Raise original error

/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/computation/ops.pyc in __init__(self, name)
    542         if name not in _mathops:
    543             raise ValueError(
--> 544                 "\"{0}\" is not a supported function".format(name))
    545         self.name = name
    546         self.func = getattr(np, name)

ValueError: "str" is not a supported function

In [118]:
lst = []
for i in range(len(sehirler)):
    if sehirler.City[i][0] == 'A':
        lst.append(True)
    else:
        lst.append(False)
        
sehirler[lst]


Out[118]:
City MegaCity NumOfCustomers
01 Adana True 71
03 Afyon False 21
06 Ankara False 7
07 Antalya True 20

In [127]:
for u in sehirler.iterrows():
    print(u[1].City)


Adana
Afyon
Ankara
Bursa
Eskisehir
Isparta
Istanbul
Samsun
Zonguldak
Icel
Antalya

In [138]:
for u in sehirler.iterrows():
    s = u[1].City
    if s[0] in ['A','a']:
        print(s)


Adana
Afyon
Ankara
Antalya

In [146]:
sehirler[sehirler.City.apply(lambda x: x[0] in ['A','a'])]


Out[146]:
City MegaCity NumOfCustomers
01 Adana True 71
03 Afyon False 21
06 Ankara False 7
07 Antalya True 20

In [150]:
sehirler.sample(frac=0.5)


Out[150]:
City MegaCity NumOfCustomers
07 Antalya True 20
32 Isparta False 99
01 Adana True 71
26 Eskisehir False 46
16 Bursa True 59
06 Ankara False 7

In [155]:
sehirler.groupby(by='MegaCity').count()


Out[155]:
City NumOfCustomers
MegaCity
False 7 7
True 4 4

In [156]:
sehirler['Flag'] = sehirler.NumOfCustomers.apply(lambda x: x>50)

In [157]:
sehirler


Out[157]:
City MegaCity NumOfCustomers Flag
01 Adana True 71 True
03 Afyon False 21 False
06 Ankara False 7 False
16 Bursa True 59 True
26 Eskisehir False 46 False
32 Isparta False 99 True
34 Istanbul True 73 True
55 Samsun False 69 True
67 Zonguldak False 11 False
33 Icel False 49 False
07 Antalya True 20 False

In [165]:
sehirler.drop(['Flag'],axis=1, inplace=True)

In [170]:
sehirler.loc['55'].MegaCity


/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/pandas/core/generic.py:2999: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value

In [175]:
sehirler.MegaCity['55']


Out[175]:
True

In [176]:
sehirler


Out[176]:
City MegaCity NumOfCustomers
01 Adana True 71
03 Afyon False 21
06 Ankara False 7
16 Bursa True 59
26 Eskisehir False 46
32 Isparta False 99
34 Istanbul True 73
55 Samsun True 69
67 Zonguldak False 11
33 Icel False 49
07 Antalya True 20

In [187]:
plt.figure(figsize=(10,5))
msft['Open'].plot()
plt.show()



In [182]:
msft['Open']['2015']


Out[182]:
Date
2015-01-02    46.660000
2015-01-05    46.369999
2015-01-06    46.380001
2015-01-07    45.980000
2015-01-08    46.750000
2015-01-09    47.610001
2015-01-12    47.419998
2015-01-13    46.970001
2015-01-14    45.959999
2015-01-15    46.220001
2015-01-16    45.310001
2015-01-20    46.299999
2015-01-21    45.939999
2015-01-22    46.380001
2015-01-23    47.360001
2015-01-26    47.000000
2015-01-27    42.950001
2015-01-28    42.740002
2015-01-29    40.930000
2015-01-30    41.549999
2015-02-02    40.590000
2015-02-03    41.630001
2015-02-04    41.939999
2015-02-05    42.220001
2015-02-06    42.680000
2015-02-09    42.240002
2015-02-10    42.740002
2015-02-11    42.650002
2015-02-12    42.660000
2015-02-13    43.380001
                ...    
2015-11-18    53.000000
2015-11-19    53.990002
2015-11-20    54.250000
2015-11-23    54.250000
2015-11-24    53.919998
2015-11-25    54.090000
2015-11-27    53.799999
2015-11-30    54.540001
2015-12-01    54.410000
2015-12-02    55.320000
2015-12-03    55.490002
2015-12-04    54.119999
2015-12-07    55.790001
2015-12-08    55.470001
2015-12-09    55.369999
2015-12-10    55.389999
2015-12-11    54.709999
2015-12-14    54.330002
2015-12-15    55.660000
2015-12-16    55.540001
2015-12-17    56.360001
2015-12-18    55.770000
2015-12-21    54.880001
2015-12-22    54.990002
2015-12-23    55.700001
2015-12-24    55.860001
2015-12-28    55.349998
2015-12-29    56.290001
2015-12-30    56.470001
2015-12-31    56.040001
Name: Open, Length: 252, dtype: float64

In [246]:
tmp = msft['2015-01-01':'2015-08-01']

m10 = tmp.Close.rolling(10).mean()

In [254]:
#tmp.Close.plot()
tmp.Close.rolling(5).mean().plot()
tmp.Close.rolling(30).mean().plot()


plt.show()



In [225]:
tmp.iloc[4]


Out[225]:
Open         4.675000e+01
High         4.775000e+01
Low          4.672000e+01
Close        4.759000e+01
Adj Close    4.410660e+01
Volume       2.964520e+07
Name: 2015-01-08 00:00:00, dtype: float64

In [228]:
a = tmp.Close.rolling(5).mean()
b = tmp.Close.rolling(30).mean()

x = a - b

In [274]:
import matplotlib.pylab as plt

#x[(np.sign(x.shift(1))*np.sign(x)) < 0].plot(marker='o',linestyle=None)

x.plot(color='g')
ax = plt.gca()
#ax.set_yticks(tmp2.index)
tmp2 = change[change<0]
tmp2.plot(marker='o',style=None)
ax.grid('on')


change = np.sign(x.shift(1))*np.sign(x)



In [270]:
ax.set_yticks(tmp2.index)


Out[270]:
[<matplotlib.axis.YTick at 0x11ba59e10>,
 <matplotlib.axis.YTick at 0x11c2e0f10>,
 <matplotlib.axis.YTick at 0x11c7d3190>,
 <matplotlib.axis.YTick at 0x11c7d3890>,
 <matplotlib.axis.YTick at 0x11c7c9b90>]

In [269]:
tmp2 = change[change<0]

tmp2


Out[269]:
Date
2015-02-25   -1.0
2015-03-06   -1.0
2015-04-16   -1.0
2015-06-02   -1.0
2015-07-16   -1.0
Name: Close, dtype: float64

In [266]:
[u.date() for u in idx]


Out[266]:
[datetime.date(2015, 2, 25),
 datetime.date(2015, 3, 6),
 datetime.date(2015, 4, 16),
 datetime.date(2015, 6, 2),
 datetime.date(2015, 7, 16)]

In [279]:
[i**3 for i in range(10)]


Out[279]:
[0, 1, 8, 27, 64, 125, 216, 343, 512, 729]

In [284]:
d = {i: i**3 for i in range(10)}

d


Out[284]:
{0: 0, 1: 1, 2: 8, 3: 27, 4: 64, 5: 125, 6: 216, 7: 343, 8: 512, 9: 729}

A simple object


In [31]:
def my_plot(x):
    plt.plot(x)
    plt.show()
    
x = [10,12,45]

my_plot(x)



In [7]:
%matplotlib inline
import matplotlib.pylab as plt
import numpy as np

class my_obj(object):
    def __init__(self, x):
        self.x = np.random.randn(x)
    def plot(self):
        plt.plot(self.x)   
        plt.show()
    def __repr__(self):
        for i in range(len(self.x)):
            print(i, self.x[i])
            
        return str('Merhaba')
    
    
u = my_obj(10)  

u.plot()

u


0 0.891532240598
1 0.285991922807
2 -0.368639636438
3 1.39495880715
4 0.258725491743
5 -0.428671924989
6 0.0423727936349
7 -0.349209379967
8 -1.26714747667
9 0.321600117667
Out[7]:
Merhaba

In [17]:
class polynomial(object):
    def __init__(self, c, v):
        self.coeff = c
        self.v = v
    def __repr__(self):
        D = len(self.coeff)
        for i in range(D):
            if i<D-1:
                print(self.coeff[i], end='')
                print('{}^{} '.format(self.v, D-i-1), end='')
            else:
                print(self.coeff[i])
        
        return str(self.coeff)
    

p = polynomial([2,3,1], 'z')
p


2z^2 3z^1 1
Out[17]:
[2, 3, 1]

In [1]:
import pandas as pd
import pandas_datareader as web


import datetime

start = datetime.datetime(2017, 11, 1)
end = datetime.datetime(2017, 11, 30)
msft = web.DataReader("MSFT", 'yahoo', start, end)
aapl = web.DataReader("AAPL", 'yahoo', start, end)

In [29]:
import matplotlib.pyplot as plt
import numpy as np
u = aapl.Close - aapl.Open

thr = 0.5

plt.bar(u[u<-thr].index, u[u<-thr], color='red')
plt.bar(u[np.abs(u)<thr].index, u[np.abs(u)<thr], color='blue')
plt.bar(u[u>=thr].index, u[u>=thr], color='green')
plt.show()



In [24]:
x = [1,5,4]
y = [10,30,70]
plt.bar(x, y)
plt.show()